import pandas as pd
import numpy as np

arr = np.array([1, 2, 3, 4, 5])
pd.Series(arr)

lst = [0 , 1, 2, 3, 4, 5]
pd.Series(lst)

pd.Series(index = ['a', 'b', 'c'], data = [1, 2, 3])

a    1
b    2
c    3
dtype: int64

pd.Series(5).repeat(10)

0    5
0    5
0    5
0    5
0    5
0    5
0    5
0    5
0    5
0    5
dtype: int64

pd.Series(5).repeat(10).reset_index(drop = True)

0    5
1    5
2    5
3    5
4    5
5    5
6    5
7    5
8    5
9    5
dtype: int64

pd.Series([10,20]).repeat([5,4]).reset_index(drop = True) # repeat 10 5 times and 20 4 times|

0    10
1    10
2    10
3    10
4    10
5    20
6    20
7    20
8    20
dtype: int64

s = pd.Series([1, 2, 3, 4, 5]).repeat([5, 4, 3, 2, 1]).reset_index(drop = True)

s[:]

0     1
1     1
2     1
3     1
4     1
5     2
6     2
7     2
8     2
9     3
10    3
11    3
12    4
13    4
14    5
dtype: int64

s[0]

1

s[:-1]

0     1
1     1
2     1
3     1
4     1
5     2
6     2
7     2
8     2
9     3
10    3
11    3
12    4
13    4
dtype: int64

s[1:-2]

1     1
2     1
3     1
4     1
5     2
6     2
7     2
8     2
9     3
10    3
11    3
12    4
dtype: int64

s[::-1]

14    5
13    4
12    4
11    3
10    3
9     3
8     2
7     2
6     2
5     2
4     1
3     1
2     1
1     1
0     1
dtype: int64

s[6:-3]

6     2
7     2
8     2
9     3
10    3
11    3
dtype: int64

sr = pd.Series([1,2,3,4,5,6,7,8,9,10]).repeat([5,4,3,2,1,5,4,3,2,1]).reset_index(drop = True)
# print(sr)
sr.agg(['sum', 'mean' , 'std' , 'min' , 'max' , 'median', 'count','var'], axis = 0)

sum       145.000000
mean        4.833333
std         2.841604
min         1.000000
max        10.000000
median      5.500000
count      30.000000
var         8.074713
dtype: float64

sr1 = pd.Series([-5,-7,1,7,-3]).repeat([5,4,3,2,1]).reset_index(drop = True)
sr1.abs()

0     5
1     5
2     5
3     5
4     5
5     7
6     7
7     7
8     7
9     1
10    1
11    1
12    7
13    7
14    3
dtype: int64

sr = pd.Series([1,2,3,4,5,6,7,8,9,10])
sr1 = pd.Series([-5,-7,1,7,-3,0,8,-9])
sr2=pd.concat([sr1,sr])
sr2.reset_index(drop = True)

0     -5
1     -7
2      1
3      7
4     -3
5      0
6      8
7     -9
8      1
9      2
10     3
11     4
12     5
13     6
14     7
15     8
16     9
17    10
dtype: int64

print(type(sr1[0]))
sr1.astype('float64')
# print(type(sr1[0]))

<class 'numpy.int64'>

0   -5.0
1   -7.0
2    1.0
3    7.0
4   -3.0
5    0.0
6    8.0
7   -9.0
dtype: float64

sr = pd.Series([10,20,3,4,50,6,7,8,9,10])
sr.between(10,70)

0     True
1     True
2    False
3    False
4     True
5    False
6    False
7    False
8    False
9     True
dtype: bool

ser = pd.Series(["Shubham Patel " , " Satyam Chourasia" , "Chandan Kumar  " , " Mradul Singh "])

print(ser.str.lower())
print('*'*50)
print(ser.str.upper())
print('*'*50)
print(ser.str.capitalize())
print('*'*50)
print(ser.str.title())



print('*'*50)
for i in ser:
    print(i,"-->" , end=" ")
    print(len(i))

print('*'*50)
ser = ser.str.strip()

for i in ser:
    print(i,"-->" , end=" ")
    print(len(i))

0       shubham patel 
1     satyam chourasia
2      chandan kumar  
3        mradul singh 
dtype: object
**************************************************
0       SHUBHAM PATEL 
1     SATYAM CHOURASIA
2      CHANDAN KUMAR  
3        MRADUL SINGH 
dtype: object
**************************************************
0       Shubham patel 
1     satyam chourasia
2      Chandan kumar  
3        mradul singh 
dtype: object
**************************************************
0       Shubham Patel 
1     Satyam Chourasia
2      Chandan Kumar  
3        Mradul Singh 
dtype: object
**************************************************
Shubham Patel  --> 14
 Satyam Chourasia --> 17
Chandan Kumar   --> 15
 Mradul Singh  --> 14
**************************************************
Shubham Patel --> 13
Satyam Chourasia --> 16
Chandan Kumar --> 13
Mradul Singh --> 12

ser = pd.Series(["Shubham Patel" , "Satyam Chourasia" , "Chandan Kumar" , "Mradul Singh"])

ser.str.split()[0][0][0] , ser.str.split()[0][0][1] , ser.str.split()[0][0][2] , ser.str.split()[0][0] , ser.str.split()[0] , ser.str.split()[1]

('S', 'h', 'u', 'Shubham', ['Shubham', 'Patel'], ['Satyam', 'Chourasia'])

ser.str.count('a')
ser.str.contains('a')
ser.str.startswith('S')
ser.str.endswith('l')
ser.str.find('Singh')
ser.str.replace('a' , 'A')

0       ShubhAm PAtel
1    SAtyAm ChourAsiA
2       ChAndAn KumAr
3        MrAdul Singh
dtype: object

print(ser.to_list())
sr = ser.to_list()
serr = pd.Series(sr)
print(serr)

['Shubham Patel', 'Satyam Chourasia', 'Chandan Kumar', 'Mradul Singh']
0       Shubham Patel
1    Satyam Chourasia
2       Chandan Kumar
3        Mradul Singh
dtype: object

lst = [1,2,3,4,5,6,7,8,9,10]
df =pd.DataFrame(lst)
lst1 = [['Satyam' ,'20'] , ['Shubham' , '21'] , ['Chandan' , '22'] , ['Mradul' , '23']]
df1 = pd.DataFrame(lst1)
df1

dct = {'Name' : pd.Series(['Satyam' ,'Shubham' , 'Chandan' , 'Mradul']) , 'Age' :pd.Series( [20,21,22,23]) , 'Address' : pd.Series(['Bihar' , 'UP' , 'MP' , 'Delhi']) , 'Phone' : pd.Series([1234567890 , 1234567890 , 1234567890 , 1234567890])}
df2 = pd.DataFrame(dct)
df2['Name']

0     Satyam
1    Shubham
2    Chandan
3     Mradul
Name: Name, dtype: object

df2.loc[0:2, 'Name' : 'Address']
#This loc function is used to access the data by the index value of the row and column in which we have to pass the row name we want to access and also the column name we want to access
# the last value is inclusive in the loc function

df2.iloc[:, :]
# Here indexing is normal which we use in lists.
# we can access the rows and columns using the list

df2.iloc[[1,3] , [1,-1]]
# this is another way of accessing the data using the list

dff = df2.loc[df2['Age']>20 , ['Name' , 'Age']]
print(dff)
# by lambda
df2.loc[df2['Name'].apply(lambda x : len(x)>6) ]

      Name  Age
1  Shubham   21
2  Chandan   22
3   Mradul   23

df2['Height'] = pd.Series([5.5 , 5.6 , 5.7 , 5.8])
df2['Age']+=10
df2

df2['weight'] = pd.Series([73, 65 , 55 , 95])
df2['ones'] = pd.Series([1,11,111,1111])
df2['twos'] = pd.Series([2,22,222,2222])
df2

del df2['ones']
df2

df2.pop('twos')
df2

df2 = pd.concat([df1,df2]).reset_index(drop = True)
# The columns which are missing in one of the dataframe will be filled with NaN i.e null

df2

df2.drop([0,1] ,axis = 1 , inplace=True)

# df2.drop([1,2,3,4] , axis =0 , inplace=True)
df2

df2

df2.T

df2.axes

[RangeIndex(start=0, stop=8, step=1),
 Index(['Name', 'Age', 'Address', 'Phone', 'Height', 'weight'], dtype='object')]

df.ndim
# The .ndim attribute in Pandas gives the number of dimensions of the DataFrame which is always 2 i.e rows and columns

2

df.dtypes

0    int64
dtype: object

df2.shape
# THIS .shape ATTRIBUTE IN PANDAS GIVES THE NUMBER OF DIMENSIONS OF THE DATAFRAME AS A TUPLE i.e (rows,columns)

(8, 6)

df.head()
# Gives the first 5 data rows from the dataset
# By default the number is 5 but we can access any number of data from the datasets

df.tail()
# Gives the last 5 data rows from the dataset
# By default the number is 5 but we can access any number of data from the datasets

df.empty

False

df3 = pd.DataFrame({'one' : pd.Series([9,823,191,10,12]) , 
                    'two' : pd.Series([10 ,40 ,45 ,89 ,32]) ,
                    'three' : pd.Series([1000,2000,4000,2500,1202]) ,
                    'four': pd.Series([1200,4002,4332,6533,2343]) ,
                    'five' : pd.Series([100 ,500 ,200 ,350 ,583])})
df3

df3.describe()

df3.mean()

one       209.0
two        43.2
three    2140.4
four     3682.0
five      346.6
dtype: float64

df3.max()

one       823
two        89
three    4000
four     6533
five      583
dtype: int64

df3.min()

one         9
two        10
three    1000
four     1200
five      100
dtype: int64

df3.mode()

df3.sum()

one       1045
two        216
three    10702
four     18410
five      1733
dtype: int64

df3.std()

one       352.040481
two        28.891175
three    1202.938402
four     2037.544233
five      201.079089
dtype: float64

df3.var()

one       123932.5
two          834.7
three    1447060.8
four     4151586.5
five       40432.8
dtype: float64

df3

def add_(i,j):
    return i+j

def subs_(i,j):
    return i+j

def mean_(col):
    return col.mean()

def square_(j):
    return j**2

df3.pipe(add_ , 10)

df3.pipe(subs_ , 5)

df3.pipe(mean_).pipe(square_) # output of one goes into the input of another function

one         43681.00
two          1866.24
three     4581312.16
four     13557124.00
five       120131.56
dtype: float64

df3

df3.apply(np.mean)

one       209.0
two        43.2
three    2140.4
four     3682.0
five      346.6
dtype: float64

df3.apply(np.median)

one        12.0
two        40.0
three    2000.0
four     4002.0
five      350.0
dtype: float64

df3.apply(lambda x: x.max() - x.min())

one       814
two        79
three    3000
four     5333
five      483
dtype: int64

df3[['one','two']].apply(lambda x: x.max() - x.min() )

one    814
two     79
dtype: int64

df = pd.DataFrame({'A': [1.2 , 3.4 , 5.6] ,
                    'B' :[4.8 , 5.9 , 3.98]})
df

df_1 =df.map(np.int64)
print(df_1)

df_2 = df.apply(lambda x: x.var() , axis=0)
print(df_2)

   A  B
0  1  4
1  3  5
2  5  3
A    4.840000
B    0.928133
dtype: float64

print(df3)
print('-'*50)
print(df3.reindex([0,1,2,3,4]))
print('-'*50)
print(df3.reindex([4,3,2,1,0]).reset_index(drop = True))
print('-'*50)
print(df3)

   one  two  three  four  five
0    9   10   1000  1200   100
1  823   40   2000  4002   500
2  191   45   4000  4332   200
3   10   89   2500  6533   350
4   12   32   1202  2343   583
--------------------------------------------------
   one  two  three  four  five
0    9   10   1000  1200   100
1  823   40   2000  4002   500
2  191   45   4000  4332   200
3   10   89   2500  6533   350
4   12   32   1202  2343   583
--------------------------------------------------
   one  two  three  four  five
0   12   32   1202  2343   583
1   10   89   2500  6533   350
2  191   45   4000  4332   200
3  823   40   2000  4002   500
4    9   10   1000  1200   100
--------------------------------------------------
   one  two  three  four  five
0    9   10   1000  1200   100
1  823   40   2000  4002   500
2  191   45   4000  4332   200
3   10   89   2500  6533   350
4   12   32   1202  2343   583

df3.rename(columns={'one' : 'ONE' , 'two' : 'TWO' , 'three' : 'THREE' , 'four' : 'FOUR' , 'five' : 'FIVE'} , inplace=True)
df3

df3.sort_values(by = ['ONE' ,'FOUR'],ascending=False , kind='mergesort').reset_index(drop = True , inplace=True)

df3

df4 = pd.DataFrame({'Team':['India' , 'India', 'Australia' , 'Australia' , 'SA' ,'SA' , 'SA' , 'SA' , 'SA' , 'NewZealand', 'NewZealand' , 'NewZealand' , 'NewZealand' , 'India'] , 
                    'Rank':[1,2,3,4,5,6,7,8,9,10,11,12,13,14] , 
                    'Year':[2011,2012,2011,2012,2011,2012,2013,2014,2015,2011,2012,2013,2014,2015] , 
                    'Points':[874,787,117,111,102,112,112,98,88,100,98,92,90,79]})
print(df4)
print('-'*50)
df4.sort_values(by='Team' , inplace=True)
print(df4)
print('-'*50)

df4.groupby('Team').groups

          Team  Rank  Year  Points
0        India     1  2011     874
1        India     2  2012     787
2    Australia     3  2011     117
3    Australia     4  2012     111
4           SA     5  2011     102
5           SA     6  2012     112
6           SA     7  2013     112
7           SA     8  2014      98
8           SA     9  2015      88
9   NewZealand    10  2011     100
10  NewZealand    11  2012      98
11  NewZealand    12  2013      92
12  NewZealand    13  2014      90
13       India    14  2015      79
--------------------------------------------------
          Team  Rank  Year  Points
2    Australia     3  2011     117
3    Australia     4  2012     111
0        India     1  2011     874
1        India     2  2012     787
13       India    14  2015      79
9   NewZealand    10  2011     100
10  NewZealand    11  2012      98
11  NewZealand    12  2013      92
12  NewZealand    13  2014      90
4           SA     5  2011     102
5           SA     6  2012     112
6           SA     7  2013     112
7           SA     8  2014      98
8           SA     9  2015      88
--------------------------------------------------

{'Australia': [2, 3], 'India': [0, 1, 13], 'NewZealand': [9, 10, 11, 12], 'SA': [4, 5, 6, 7, 8]}

df4.groupby(['Team' , 'Year']).get_group(('India' , 2011))

df4.groupby('Team')['Points'].sum()

Team
Australia      228
India         1740
NewZealand     380
SA             512
Name: Points, dtype: int64

df4.groupby('Team').sum()['Points'].sort_values(ascending=True)

Team
Australia      228
NewZealand     380
SA             512
India         1740
Name: Points, dtype: int64

groups = df4.groupby('Team')
# type(groups['Points'])
groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])

C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function sum at 0x000001A6737B5C60> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function mean at 0x000001A6737B6D40> is currently using SeriesGroupBy.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function std at 0x000001A6737B6E80> is currently using SeriesGroupBy.std. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "std" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function var at 0x000001A6737B6FC0> is currently using SeriesGroupBy.var. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "var" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function min at 0x000001A6737B6480> is currently using SeriesGroupBy.min. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "min" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function max at 0x000001A6737B6340> is currently using SeriesGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function median at 0x000001A6738F76A0> is currently using SeriesGroupBy.median. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "median" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_42724\3572888103.py:3: FutureWarning: The provided callable <function prod at 0x000001A6737B6660> is currently using SeriesGroupBy.prod. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "prod" instead.
  groups['Points'].agg([np.sum , np.mean , np.std , np.var , np.min , np.max , np.median , np.count_nonzero , np.size , np.prod])

df4.groupby('Team').filter(lambda x: len(x)==5)

data = pd.read_csv('./csv_files/deliveries.csv')
data

data.head()

data.tail()

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150460 entries, 0 to 150459
Data columns (total 21 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   match_id          150460 non-null  int64 
 1   inning            150460 non-null  int64 
 2   batting_team      150460 non-null  object
 3   bowling_team      150460 non-null  object
 4   over              150460 non-null  int64 
 5   ball              150460 non-null  int64 
 6   batsman           150460 non-null  object
 7   non_striker       150460 non-null  object
 8   bowler            150460 non-null  object
 9   is_super_over     150460 non-null  int64 
 10  wide_runs         150460 non-null  int64 
 11  bye_runs          150460 non-null  int64 
 12  legbye_runs       150460 non-null  int64 
 13  noball_runs       150460 non-null  int64 
 14  penalty_runs      150460 non-null  int64 
 15  batsman_runs      150460 non-null  int64 
 16  extra_runs        150460 non-null  int64 
 17  total_runs        150460 non-null  int64 
 18  player_dismissed  7438 non-null    object
 19  dismissal_kind    7438 non-null    object
 20  fielder           5369 non-null    object
dtypes: int64(13), object(8)
memory usage: 24.1+ MB

data.describe(percentiles=[0.90])

data.isnull().sum()

match_id                 0
inning                   0
batting_team             0
bowling_team             0
over                     0
ball                     0
batsman                  0
non_striker              0
bowler                   0
is_super_over            0
wide_runs                0
bye_runs                 0
legbye_runs              0
noball_runs              0
penalty_runs             0
batsman_runs             0
extra_runs               0
total_runs               0
player_dismissed    143022
dismissal_kind      143022
fielder             145091
dtype: int64

data1 = pd.read_csv('./csv_files/matches.csv')

data1

data1['season'].quantile(.99)

2017.0

# This tells us the unique player names in the dataset
data1['player_of_match'].unique()

# This tells us the unique player counts in the dataset
data1['player_of_match'].nunique()

201

data1.isnull().sum()

id                   0
season               0
city                 7
date                 0
team1                0
team2                0
toss_winner          0
toss_decision        0
result               0
dl_applied           0
winner               3
win_by_runs          0
win_by_wickets       0
player_of_match      3
venue                0
umpire1              1
umpire2              1
umpire3            636
dtype: int64

data1.dropna(inplace=True)

data1.isnull().sum()

id                 0
season             0
city               0
date               0
team1              0
team2              0
toss_winner        0
toss_decision      0
result             0
dl_applied         0
winner             0
win_by_runs        0
win_by_wickets     0
player_of_match    0
venue              0
umpire1            0
umpire2            0
umpire3            0
dtype: int64

# data.isnull().sum()
data

data.fillna('dont know',inplace=True)

data.isnull().sum()

match_id            0
inning              0
batting_team        0
bowling_team        0
over                0
ball                0
batsman             0
non_striker         0
bowler              0
is_super_over       0
wide_runs           0
bye_runs            0
legbye_runs         0
noball_runs         0
penalty_runs        0
batsman_runs        0
extra_runs          0
total_runs          0
player_dismissed    0
dismissal_kind      0
fielder             0
dtype: int64

data

df3.to_csv('./csv_files/Numbers.csv' , index=False)

	one	two	three	four	five
count	5.000000	5.000000	5.000000	5.000000	5.000000
mean	209.000000	43.200000	2140.400000	3682.000000	346.600000
std	352.040481	28.891175	1202.938402	2037.544233	201.079089
min	9.000000	10.000000	1000.000000	1200.000000	100.000000
25%	10.000000	32.000000	1202.000000	2343.000000	200.000000
50%	12.000000	40.000000	2000.000000	4002.000000	350.000000
75%	191.000000	45.000000	2500.000000	4332.000000	500.000000
max	823.000000	89.000000	4000.000000	6533.000000	583.000000

	one	two	three	four	five
0	14	15	1005	1205	105
1	828	45	2005	4007	505
2	196	50	4005	4337	205
3	15	94	2505	6538	355
4	17	37	1207	2348	588

	match_id	inning	batting_team	bowling_team	over	ball	batsman	non_striker	bowler	is_super_over	...	bye_runs	legbye_runs	noball_runs	penalty_runs	batsman_runs	extra_runs	total_runs	player_dismissed	dismissal_kind	fielder
0	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	1	DA Warner	S Dhawan	TS Mills	0	...	0	0	0	0	0	0	0	NaN	NaN	NaN
1	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	2	DA Warner	S Dhawan	TS Mills	0	...	0	0	0	0	0	0	0	NaN	NaN	NaN
2	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	3	DA Warner	S Dhawan	TS Mills	0	...	0	0	0	0	4	0	4	NaN	NaN	NaN
3	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	4	DA Warner	S Dhawan	TS Mills	0	...	0	0	0	0	0	0	0	NaN	NaN	NaN
4	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	5	DA Warner	S Dhawan	TS Mills	0	...	0	0	0	0	0	2	2	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
150455	636	2	Royal Challengers Bangalore	Sunrisers Hyderabad	20	2	Sachin Baby	CJ Jordan	B Kumar	0	...	0	0	0	0	2	0	2	NaN	NaN	NaN
150456	636	2	Royal Challengers Bangalore	Sunrisers Hyderabad	20	3	Sachin Baby	CJ Jordan	B Kumar	0	...	0	0	0	0	0	0	0	CJ Jordan	run out	NV Ojha
150457	636	2	Royal Challengers Bangalore	Sunrisers Hyderabad	20	4	Iqbal Abdulla	Sachin Baby	B Kumar	0	...	0	1	0	0	0	1	1	NaN	NaN	NaN
150458	636	2	Royal Challengers Bangalore	Sunrisers Hyderabad	20	5	Sachin Baby	Iqbal Abdulla	B Kumar	0	...	0	0	0	0	1	0	1	NaN	NaN	NaN
150459	636	2	Royal Challengers Bangalore	Sunrisers Hyderabad	20	6	Iqbal Abdulla	Sachin Baby	B Kumar	0	...	0	0	0	0	4	0	4	NaN	NaN	NaN

	match_id	inning	over	ball	is_super_over	wide_runs	bye_runs	legbye_runs	noball_runs	penalty_runs	batsman_runs	extra_runs	total_runs
count	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000	150460.000000
mean	318.281317	1.482188	10.142649	3.616483	0.000538	0.037498	0.004885	0.022232	0.004340	0.000066	1.222445	0.069022	1.291466
std	182.955531	0.501768	5.674338	1.807698	0.023196	0.257398	0.114234	0.200104	0.072652	0.018229	1.594509	0.349667	1.583240
min	1.000000	1.000000	1.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	319.000000	1.000000	10.000000	4.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	1.000000
90%	573.000000	2.000000	18.000000	6.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	4.000000	0.000000	4.000000
max	636.000000	4.000000	20.000000	9.000000	1.000000	5.000000	4.000000	5.000000	5.000000	5.000000	6.000000	7.000000	7.000000

	id	season	city	date	team1	team2	toss_winner	toss_decision	result	dl_applied	winner	win_by_runs	win_by_wickets	player_of_match	venue	umpire1	umpire2	umpire3
0	1	2017	Hyderabad	2017-04-05	Sunrisers Hyderabad	Royal Challengers Bangalore	Royal Challengers Bangalore	field	normal	0	Sunrisers Hyderabad	35	0	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
1	2	2017	Pune	2017-04-06	Mumbai Indians	Rising Pune Supergiant	Rising Pune Supergiant	field	normal	0	Rising Pune Supergiant	0	7	SPD Smith	Maharashtra Cricket Association Stadium	A Nand Kishore	S Ravi	NaN
2	3	2017	Rajkot	2017-04-07	Gujarat Lions	Kolkata Knight Riders	Kolkata Knight Riders	field	normal	0	Kolkata Knight Riders	0	10	CA Lynn	Saurashtra Cricket Association Stadium	Nitin Menon	CK Nandan	NaN
3	4	2017	Indore	2017-04-08	Rising Pune Supergiant	Kings XI Punjab	Kings XI Punjab	field	normal	0	Kings XI Punjab	0	6	GJ Maxwell	Holkar Cricket Stadium	AK Chaudhary	C Shamshuddin	NaN
4	5	2017	Bangalore	2017-04-08	Royal Challengers Bangalore	Delhi Daredevils	Royal Challengers Bangalore	bat	normal	0	Royal Challengers Bangalore	15	0	KM Jadhav	M Chinnaswamy Stadium	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
631	632	2016	Raipur	2016-05-22	Delhi Daredevils	Royal Challengers Bangalore	Royal Challengers Bangalore	field	normal	0	Royal Challengers Bangalore	0	6	V Kohli	Shaheed Veer Narayan Singh International Stadium	A Nand Kishore	BNJ Oxenford	NaN
632	633	2016	Bangalore	2016-05-24	Gujarat Lions	Royal Challengers Bangalore	Royal Challengers Bangalore	field	normal	0	Royal Challengers Bangalore	0	4	AB de Villiers	M Chinnaswamy Stadium	AK Chaudhary	HDPK Dharmasena	NaN
633	634	2016	Delhi	2016-05-25	Sunrisers Hyderabad	Kolkata Knight Riders	Kolkata Knight Riders	field	normal	0	Sunrisers Hyderabad	22	0	MC Henriques	Feroz Shah Kotla	M Erasmus	C Shamshuddin	NaN
634	635	2016	Delhi	2016-05-27	Gujarat Lions	Sunrisers Hyderabad	Sunrisers Hyderabad	field	normal	0	Sunrisers Hyderabad	0	4	DA Warner	Feroz Shah Kotla	M Erasmus	CK Nandan	NaN
635	636	2016	Bangalore	2016-05-29	Sunrisers Hyderabad	Royal Challengers Bangalore	Sunrisers Hyderabad	bat	normal	0	Sunrisers Hyderabad	8	0	BCJ Cutting	M Chinnaswamy Stadium	HDPK Dharmasena	BNJ Oxenford	NaN

Module @1¶

Importing¶

making pandas Series from list and array¶

repeat()¶

pandas reset_index()¶

* Accesing the elements of a Series¶

(a) pandas Series agg() function¶

(b) pandas abs() function¶

(c) Appending Series¶

(d) astype()¶

(e) Between function¶

String Functions¶

(a) upper and lower fucntions¶

(b) split() function¶

(c) Series.str. count() , find() , startswith() , endswith() , replace()¶

(d) Converting a Series to List¶

Module @2¶

(a) Creating a Dataframe from Pandas¶

(b) slicing using loc and iloc fucntions¶

(c) Slicing using Conditions¶

(d) adding a new column in a dataframe¶

(e) deleting a column¶

(f) Adding a row¶

(g) Pandas drop() function¶

(h) Transpose¶

Some Extra DataFrame Functionalities¶

*Mathematical or Statistical Fucntions¶

Pipe() function¶

Apply() function¶

Apply map() function¶

applymap() has been depricated so use map() instead¶

Re-Index function¶

rename function¶

sort_values¶

Groupby¶

Module @3¶

Reading csv and working upon it.¶

dropna()¶

fillna()¶

to create a new csv file¶

	Name	Age	Address	Phone
0	Satyam	20	Bihar	1234567890
1	Shubham	21	UP	1234567890
2	Chandan	22	MP	1234567890
3	Mradul	23	Delhi	1234567890

	Name	Age	Address	Phone	Height	weight
0	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN
3	NaN	NaN	NaN	NaN	NaN	NaN
4	Satyam	30.0	Bihar	1.234568e+09	5.5	73.0
5	Shubham	31.0	UP	1.234568e+09	5.6	65.0
6	Chandan	32.0	MP	1.234568e+09	5.7	55.0
7	Mradul	33.0	Delhi	1.234568e+09	5.8	95.0

	0	1	2	3	4	5	6	7
Name	NaN	NaN	NaN	NaN	Satyam	Shubham	Chandan	Mradul
Age	NaN	NaN	NaN	NaN	30.0	31.0	32.0	33.0
Address	NaN	NaN	NaN	NaN	Bihar	UP	MP	Delhi
Phone	NaN	NaN	NaN	NaN	1234567890.0	1234567890.0	1234567890.0	1234567890.0
Height	NaN	NaN	NaN	NaN	5.5	5.6	5.7	5.8
weight	NaN	NaN	NaN	NaN	73.0	65.0	55.0	95.0

	one	two	three	four	five
0	9	10	1000	1200	100
1	823	40	2000	4002	500
2	191	45	4000	4332	200
3	10	89	2500	6533	350
4	12	32	1202	2343	583

	one	two	three	four	five
0	19	20	1010	1210	110
1	833	50	2010	4012	510
2	201	55	4010	4342	210
3	20	99	2510	6543	360
4	22	42	1212	2353	593

	sum	mean	std	var	min	max	median	count_nonzero	size	prod
Team
Australia	228	114.0	4.242641	18.000000	111	117	114.0	2	2	12987
India	1740	580.0	436.053896	190143.000000	79	874	787.0	3	3	54339202
NewZealand	380	95.0	4.760952	22.666667	90	100	95.0	4	4	81144000
SA	512	102.4	10.139033	102.800000	88	112	102.0	5	5	11034304512